########### MANUSCRIPT: The Role of Education and Attitudes in Cooking Fuel Choice: Evidence from two states in India
########### JOURNAL: ENERGY FOR SUSTAINABLE DEVELOPMENT
########### AUTHORS: CARLOS F. GOULD/1 AND JOHANNES URPELAINEN/2
########### AFFILIATIONS: 1/COLUMBIA UNIVERSITY MAILMAN SCHOOL OF PUBLIC HEALTH AND 2/JOHNS HOPKINS SCHOOL OF ADVANCED INTERNATIONAL STUDIES
########### PURPOSE: THIS IS CODE #1 THAT LOADS DATA AND DOES PROCESSING

# FIRST WE DOWNLOAD AND PROCESS DATA FROM KERALA
# THEN, LATER, WE REPEAT THE PROCESS FOR DATA FROM RAJASTHAN


# Kerala -------------------------------------------------------


##################
### DATA        ##
##################

kerala <- read_sav("Data/Kerala_Consumer_Segment_data_250416.sav")

##################
### VARIABLES   ##
##################


#####################
## MAIN EXPOSURE VARIABLES
#####################

### chief wage earner education

kerala$Education_4_CWE <- 
  ifelse(kerala$q22==1, 1,
         ifelse(kerala$q22==2, 1,
                ifelse(kerala$q22==3, 2,
                       ifelse(kerala$q22==4, 3,
                              ifelse(kerala$q22==5, 3,
                                     ifelse(kerala$q22>=6, 4,
                                            NA))))))

kerala$Education_CWE_illiterate <- ifelse(kerala$Education_4_CWE==1, 1, 0)
kerala$Education_CWE_primary <- ifelse(kerala$Education_4_CWE==2, 1, 0)
kerala$Education_CWE_middle_high <- ifelse(kerala$Education_4_CWE==3, 1, 0)
kerala$Education_CWE_greaterhigh <- ifelse(kerala$Education_4_CWE==4, 1, 0)


### highest education level achieved by a female

kerala$Education_female_4_category_new <- 
  ifelse(kerala$q23a2_1>0, 0, 
         ifelse(kerala$q23a2_2>0, 0, 
                ifelse(kerala$q23a2_3>0, 1,
                       ifelse(kerala$q23a2_4>0, 2,
                              ifelse(kerala$q23a2_5>0, 2,
                                     ifelse(kerala$q23a2_6>0, 3,
                                            ifelse(kerala$q23a2_7>0, 3,
                                                   ifelse(kerala$q23a2_8>0, 3,
                                                          ifelse(kerala$q23a2_9>0, 3,
                                                                 NA)))))))))

kerala$Education_female_illiterate <- ifelse(kerala$Education_female_4_category_new==0, 1, 0)
kerala$Education_female_primary <- ifelse(kerala$Education_female_4_category_new==1, 1, 0)
kerala$Education_female_middle_high <- ifelse(kerala$Education_female_4_category_new==2, 1, 0)
kerala$Education_female_greaterhigh <- ifelse(kerala$Education_female_4_category_new==3, 1, 0)

#####################
## COVARIATES
#####################

# household size
kerala$HHSize <- kerala$q21a_1 + kerala$q21a_2 + kerala$q21a_3 + kerala$q21b_1 + kerala$q21b_2 + kerala$q21b_3

# is there a child under 5
kerala$Presenceofchild <- ifelse((kerala$q21a_3 + kerala$q21b_3) > 0, 1,
                                 0)
# number of children under 18
kerala$NumberChildrenUnder18 <- kerala$q21a_2 + kerala$q21a_3 + kerala$q21b_2 + kerala$q21b_3
# number of children under 5
kerala$NumberChildrenUnder5 <-kerala$q21a_3 + kerala$q21b_3
# number of adults
kerala$NumberAdults <-kerala$q21a_1 + kerala$q21b_1

# total expenditures
kerala$totalexpenditure <- rowSums(kerala[,c(97:108)])

#Caste
kerala$Caste_General <- ifelse(kerala$q27==1, 1, 0)
kerala$Caste_NotGeneral <- ifelse(kerala$q27!=1, 1, 0)
kerala$Caste_OBC <- ifelse(kerala$q27==2, 1, 0)
kerala$Caste_ScheduledCaste <- ifelse(kerala$q27==3, 1, 0)
kerala$Caste_ScheduledTribe <- ifelse(kerala$q27==4, 1, 0)
kerala$Caste_DKCS <- ifelse(kerala$q27==5, 1, 0)

#Religion
kerala$Religion_Hindu <- ifelse(kerala$q26==1, 1, 0)
kerala$Religion_Muslim <- ifelse(kerala$q26==2, 1, 0)
kerala$Religion_Christian <- ifelse(kerala$q26==3, 1, 0)
kerala$Religion_Other <- ifelse(kerala$q26>3, 1, 0)
kerala$Religion_NonHindu <- ifelse(kerala$q26!=1, 1, 0)

kerala$Rural <- ifelse(kerala$q2==1, 1, 0)
kerala$District <- kerala$q1
kerala$AgeRespondent <- kerala$q19_1
kerala$Exp_Normalized <- (kerala$totalexpenditure-min(kerala$totalexpenditure))/(max(kerala$totalexpenditure)-min(kerala$totalexpenditure))
kerala$Exp_log <- log(kerala$totalexpenditure)

# LPG costs
kerala$LPG_cylinder_cost <- kerala$q127a_1
kerala$LPG_cylinder_cost_100 <- kerala$q127a_1 / 100
kerala$LPG_yearswith <- kerala$q124

#####################
####### OUTCOME
#####################

##### LPG ownership
kerala$Owns_LPG <- ifelse(kerala$q62a_9==1,1,0)

##### LPG use
kerala$LPG_days <- kerala$q126_1
kerala$LPG_cylinderperyear <- 365 / kerala$LPG_days
kerala$LPG_kgperyear <- kerala$LPG_cylinderperyear / 14.2
kerala$LPG_kgperyear_person <- kerala$LPG_kgperyear / kerala$HHSize

kerala$LPG_regularuse <- kerala$q132

#### Firewood use

kerala$Firewood_timespurchased_year <- kerala$q76b_1
kerala$Firewood_kgpurchased_onetime <- kerala$q76c_1
kerala$Firewood_rupeespaid_onetime <- kerala$q76d_1

kerala$Firewood_collected <- kerala$q81a_1
kerala$Firewood_timescollected_month <- kerala$q83a1_1
kerala$Firewood_minutesspent_onetime <- kerala$q83a2_1

kerala$Firewood_kg_month <- kerala$q75b_1
kerala$Firewood_kg_year <- kerala$Firewood_kg_month * 12
kerala$Firewood_kgperyear_person <- kerala$Firewood_kg_year / kerala$HHSize


#####################
####### PERCEPTION VARIABLES
#####################

########### FIRST, SOME VARIABLES NEED TO BE INVERTED SO THAT ALL PERCEPTIONS ARE ORIENTED THE SAME (HIGHER = THEORIZED GREAT LIKELIHOOD OF LPG ADOPT)

#Firewood is easily available, one can easily collect firewood from the premises / neighborhood 
kerala$Perception_Firewood_Avail_Rev <- ifelse(kerala$q145a_14==1, 5, 
                                               ifelse(kerala$q145a_14==2, 4, 
                                                      ifelse(kerala$q145a_14==3, 3, 
                                                             ifelse(kerala$q145a_14==4, 2, 
                                                                    ifelse(kerala$q145a_14==5, 1,
                                                                           NA)))))

#I get free firewood that is the reason I’m using firewood
kerala$Perception_Firewood_Free_Rev <- ifelse(kerala$q145a_27==1, 5, 
                                              ifelse(kerala$q145a_27==2, 4, 
                                                     ifelse(kerala$q145a_27==3, 3, 
                                                            ifelse(kerala$q145a_27==4, 2, 
                                                                   ifelse(kerala$q145a_27==5, 1,
                                                                          NA)))))

#I  can not afford LPG cylinder/Induction stove at all


kerala$Perception_LPG_CannotAfford_Atall_Rev <- ifelse(kerala$q145a_28==1, 5, 
                                                       ifelse(kerala$q145a_28==2, 4, 
                                                              ifelse(kerala$q145a_28==3, 3, 
                                                                     ifelse(kerala$q145a_28==4, 2, 
                                                                            ifelse(kerala$q145a_28==5, 1,
                                                                                   NA)))))

#I can not afford regular cooking on LPG cylinder/Induction stove
kerala$Perception_LPG_CannotAfford_Regular_Rev <- ifelse(kerala$q145a_29==1, 5, 
                                                         ifelse(kerala$q145a_29==2, 4, 
                                                                ifelse(kerala$q145a_29==3, 3, 
                                                                       ifelse(kerala$q145a_29==4, 2, 
                                                                              ifelse(kerala$q145a_29==5, 1,
                                                                                     NA)))))


#I prefer not to tell my husband or anyone in family, even if I am not well 
kerala$Perception_PreferNotTell_Sick_Rev <- ifelse(kerala$q145a_24==1, 5, 
                                                   ifelse(kerala$q145a_24==2, 4, 
                                                          ifelse(kerala$q145a_24==3, 3, 
                                                                 ifelse(kerala$q145a_24==4, 2, 
                                                                        ifelse(kerala$q145a_24==5, 1,
                                                                               NA)))))



########### SECOND, SUM INDIVIDUAL PERCEPTIONS TO MAKE PERCEPTION INDICES. 


# Chulha and firewood are valuable ]]] NOT REVERSED
kerala$Perception_Index_FirewoodValuable <- rowSums(kerala[,c("q145a_1", "q145a_2", "q145a_3", "q145a_4", "q145a_5", "q145a_6")])

# Chulha is difficult
kerala$Perception_Index_ChulhaDifficult <- rowSums(kerala[,c("q145a_11", "q145a_12", "q145a_13")])

# Firewood is not available and inconvenient
kerala$Perception_Index_FirewoodUnavailableDifficult <- rowSums(kerala[,c("q145a_15", "Perception_Firewood_Avail_Rev", "q145a_16", "q145a_17", "q145a_18", "Perception_Firewood_Free_Rev")])

# Health attitudes in cooking
kerala$Perception_Index_CookingHealth <- rowSums(kerala[,c("q145a_20", "q145a_21", "q145a_22", "q145a_23")])

# Health attitudes: generally progressive
kerala$Perception_Index_GeneralHealth <- rowSums(kerala[,c("Perception_PreferNotTell_Sick_Rev", "q145a_25", "q145a_26")])

# LPG is good and affordable
kerala$Perception_Index_LPGGoodAffordable <- rowSums(kerala[,c("q145a_10", "q145a_19", "Perception_LPG_CannotAfford_Atall_Rev", "Perception_LPG_CannotAfford_Regular_Rev", "q145a_30")])

########### FINALLY, NORMALIZE THE PERCEPTION INDICES

kerala$Perception_Index_FirewoodValuable_Norm <- (kerala$Perception_Index_FirewoodValuable-min(kerala$Perception_Index_FirewoodValuable)) / (max(kerala$Perception_Index_FirewoodValuable)-min(kerala$Perception_Index_FirewoodValuable))

kerala$Perception_Index_ChulhaDifficult_Norm <- (kerala$Perception_Index_ChulhaDifficult-min(kerala$Perception_Index_ChulhaDifficult)) / (max(kerala$Perception_Index_ChulhaDifficult)-min(kerala$Perception_Index_ChulhaDifficult))

kerala$Perception_Index_FirewoodUnavailableDifficult_Norm <- (kerala$Perception_Index_FirewoodUnavailableDifficult-min(kerala$Perception_Index_FirewoodUnavailableDifficult)) / (max(kerala$Perception_Index_FirewoodUnavailableDifficult)-min(kerala$Perception_Index_FirewoodUnavailableDifficult))

kerala$Perception_Index_CookingHealth_Norm <- (kerala$Perception_Index_CookingHealth-min(kerala$Perception_Index_CookingHealth)) / (max(kerala$Perception_Index_CookingHealth)-min(kerala$Perception_Index_CookingHealth))

kerala$Perception_Index_GeneralHealth_Norm <- (kerala$Perception_Index_GeneralHealth-min(kerala$Perception_Index_GeneralHealth)) / (max(kerala$Perception_Index_GeneralHealth)-min(kerala$Perception_Index_GeneralHealth))

kerala$Perception_Index_LPGGoodAffordable_Norm <- (kerala$Perception_Index_LPGGoodAffordable-min(kerala$Perception_Index_LPGGoodAffordable)) / (max(kerala$Perception_Index_LPGGoodAffordable)-min(kerala$Perception_Index_LPGGoodAffordable))


##################
### SUBSETS   ##
##################

kerala$q127a_1 <- ifelse(kerala$q127a_1 > 1500, NA, kerala$q127a_1)

kerala$q127a_1_100 <- kerala$q127a_1 / 100

kerala <- transform(kerala, q127a_1_100 = zoo::na.aggregate(q127a_1_100, by = q3a_3))

kerala_lpgcost <- kerala %>%
  group_by(q3a_3) %>%
  mutate(q127a_1_100_avg = mean(q127a_1_100)) %>%
  ungroup()

# Subsets of combined education categories
kerala_education_CWE_illiteratenoformal <- subset(kerala, kerala$Education_CWE_illiterate==1)
kerala_education_CWE_primary <- subset(kerala, kerala$Education_CWE_primary==1)
kerala_education_CWE_middlehigh <- subset(kerala, kerala$Education_CWE_middle_high==1)
kerala_education_CWE_greaterhigh <- subset(kerala, kerala$Education_CWE_greaterhigh==1)

kerala_education_female_illiteratenoformal <- subset(kerala, kerala$Education_female_illiterate==1)
kerala_education_female_primary <- subset(kerala, kerala$Education_female_primary==1)
kerala_education_female_middlehigh <- subset(kerala, kerala$Education_female_middle_high==1)
kerala_education_female_greaterhigh <- subset(kerala, kerala$Education_female_greaterhigh==1)


# Subsets for exploring additional associations
kerala_belowmedian_income <- kerala %>% subset(Exp_log<=median(kerala$Exp_log))
kerala_abovemedian_income <- kerala %>% subset(Exp_log>median(kerala$Exp_log))

kerala_income1 <- kerala %>% subset(Exp_log<=quantile(kerala$Exp_log, probs = 1/3))
kerala_income2 <- kerala %>% subset(Exp_log>quantile(kerala$Exp_log, probs = 1/3) & 
                                      Exp_log<=quantile(kerala$Exp_log, probs = 2/3))
kerala_income3 <- kerala %>% subset(Exp_log>quantile(kerala$Exp_log, probs = 2/3))


kerala_urban <- kerala %>% subset(Rural==0)
kerala_rural <- kerala %>% subset(Rural==1)

kerala_belowmedian_income_rural <- kerala_belowmedian_income %>% subset(Rural==1)
kerala_belowmedian_income_urban <- kerala_belowmedian_income %>% subset(Rural==0)
kerala_abovemedian_income_rural <- kerala_abovemedian_income %>% subset(Rural==1)
kerala_abovemedian_income_urban <- kerala_abovemedian_income %>% subset(Rural==0)

# Has LPG or no
kerala_lpg <- subset(kerala, kerala$Owns_LPG==1)
kerala_nolpg <- subset(kerala, kerala$Owns_LPG==0)



# Rajasthan -------------------------------------------------------

##################
### DATA        ##
##################

rajasthan <- read_sav("Data/Rajasthan Consumer Segment data_250416.sav")

##################
### VARIABLES   ##
##################


#####################
## MAIN EXPOSURE VARIABLES
#####################

### gender of the chief wage earner

rajasthan$CWE_male <- ifelse(rajasthan$q7==1, 1, 0)

### chief wage earner education

rajasthan$Education_4_CWE <- 
  ifelse(rajasthan$q22==1, 1,
         ifelse(rajasthan$q22==2, 1,
                ifelse(rajasthan$q22==3, 2,
                       ifelse(rajasthan$q22==4, 3,
                              ifelse(rajasthan$q22==5, 3,
                                     ifelse(rajasthan$q22>=6, 4,
                                            NA))))))

rajasthan$Education_CWE_illiterate <- ifelse(rajasthan$Education_4_CWE==1, 1, 0)
rajasthan$Education_CWE_primary <- ifelse(rajasthan$Education_4_CWE==2, 1, 0)
rajasthan$Education_CWE_middle_high <- ifelse(rajasthan$Education_4_CWE==3, 1, 0)
rajasthan$Education_CWE_greaterhigh <- ifelse(rajasthan$Education_4_CWE==4, 1, 0)

### education level of CWE's partner

rajasthan$Education_4_Partner <- 
  ifelse(rajasthan$q22b==1, 1,
         ifelse(rajasthan$q22b==2, 1,
                ifelse(rajasthan$q22b==3, 2,
                       ifelse(rajasthan$q22b==4, 3,
                              ifelse(rajasthan$q22b==5, 3,
                                     ifelse(rajasthan$q22b>=6, 4,
                                            NA))))))

rajasthan$Education_4_FemaleSpouse <- ifelse(rajasthan$CWE_male==1, rajasthan$Education_4_Partner, NA)

rajasthan$Education_4_FemaleSpouse_illiterate <- ifelse(rajasthan$Education_4_FemaleSpouse==1, 1, 0)
rajasthan$Education_4_FemaleSpouse_primary <- ifelse(rajasthan$Education_4_FemaleSpouse==2, 1, 0)
rajasthan$Education_4_FemaleSpouse_middle_high <- ifelse(rajasthan$Education_4_FemaleSpouse==3, 1, 0)
rajasthan$Education_4_FemaleSpouse_greaterhigh <- ifelse(rajasthan$Education_4_FemaleSpouse==4, 1, 0)




### highest education level achieved by a female
### DOES NOT WORK VERY WELL

rajasthan$Education_female_4_category_new <- 
  ifelse(rajasthan$q23a2_1>0, 0, 
         ifelse(rajasthan$q23a2_2>0, 0, 
                ifelse(rajasthan$q23a2_3>0, 1,
                       ifelse(rajasthan$q23a2_4>0, 2,
                              ifelse(rajasthan$q23a2_5>0, 2,
                                     ifelse(rajasthan$q23a2_6>0, 3,
                                            ifelse(rajasthan$q23a2_7>0, 3,
                                                   ifelse(rajasthan$q23a2_8>0, 3,
                                                          ifelse(rajasthan$q23a2_9>0, 3,
                                                                 NA)))))))))

rajasthan$Education_female_all_illiterate <- ifelse(rajasthan$q23a2_1>0, 1, 0)
rajasthan$Education_female_all_noformal <- ifelse(rajasthan$q23a2_2>0, 1, 0)
rajasthan$Education_female_all_primary <- ifelse(rajasthan$q23a2_3>0, 1, 0)
rajasthan$Education_female_all_middle <- ifelse(rajasthan$q23a2_4>0, 1, 0)
rajasthan$Education_female_all_high <- ifelse(rajasthan$q23a2_5>0, 1, 0)
rajasthan$Education_female_all_secondary <- ifelse(rajasthan$q23a2_6>0, 1, 0)
rajasthan$Education_female_all_diploma <- ifelse(rajasthan$q23a2_7>0, 1, 0)
rajasthan$Education_female_all_graduate <- ifelse(rajasthan$q23a2_8>0, 1, 0)
rajasthan$Education_female_all_postgraduate <- ifelse(rajasthan$q23a2_9>0, 1, 0)

rajasthan$Education_female_illiterate <- ifelse(rajasthan$Education_female_all_illiterate>0 | rajasthan$Education_female_all_noformal>0, 1, 0)
rajasthan$Education_female_primary <- ifelse(rajasthan$Education_female_all_primary>0, 1, 0)
rajasthan$Education_female_middle_high <- ifelse(rajasthan$Education_female_all_middle>0 | rajasthan$Education_female_all_high>0, 1, 0)
rajasthan$Education_female_greaterhigh <- ifelse(rajasthan$Education_female_all_secondary>0 |
                                                   rajasthan$Education_female_all_diploma>0 |
                                                   rajasthan$Education_female_all_graduate>0 |
                                                   rajasthan$Education_female_all_postgraduate>0, 1, 0)



#####################
## COVARIATES
#####################

# household size
rajasthan$HHSize <- rajasthan$q21a_1 + rajasthan$q21a_2 + rajasthan$q21a_3 + rajasthan$q21b_1 + rajasthan$q21b_2 + rajasthan$q21b_3

# is there a child under 5
rajasthan$Presenceofchild <- ifelse((rajasthan$q21a_3 + rajasthan$q21b_3) > 0, 1,
                                    0)
# number of children under 18
rajasthan$NumberChildrenUnder18 <- rajasthan$q21a_2 + rajasthan$q21a_3 + rajasthan$q21b_2 + rajasthan$q21b_3
# number of children under 5
rajasthan$NumberChildrenUnder5 <-rajasthan$q21a_3 + rajasthan$q21b_3
# number of adults
rajasthan$NumberAdults <-rajasthan$q21a_1 + rajasthan$q21b_1

# total expenditures
rajasthan$totalexpenditure <- rowSums(rajasthan[,c(90:101)])

#Caste
rajasthan$Caste_General <- ifelse(rajasthan$q27==1, 1, 0)
rajasthan$Caste_MotGeneral <- ifelse(rajasthan$q27!=1, 1, 0)
rajasthan$Caste_OBC <- ifelse(rajasthan$q27==2, 1, 0)
rajasthan$Caste_ScheduledCaste <- ifelse(rajasthan$q27==3, 1, 0)
rajasthan$Caste_ScheduledTribe <- ifelse(rajasthan$q27==4, 1, 0)
rajasthan$Caste_DKCS <- ifelse(rajasthan$q27==5, 1, 0)

#Religion
rajasthan$Religion_Hindu <- ifelse(rajasthan$q26==1, 1, 0)
rajasthan$Religion_Muslim <- ifelse(rajasthan$q26==2, 1, 0)
rajasthan$Religion_Christian <- ifelse(rajasthan$q26==3, 1, 0)
rajasthan$Religion_Other <- ifelse(rajasthan$q26>3, 1, 0)
rajasthan$Religion_MotHindu <- ifelse(rajasthan$q26!=1, 1, 0)

rajasthan$Rural <- ifelse(rajasthan$q2==1, 1, 0)
rajasthan$District <- rajasthan$q1new
rajasthan$AgeRespondent <- rajasthan$q19_1
rajasthan$Exp_Normalized <- (rajasthan$totalexpenditure-min(rajasthan$totalexpenditure))/(max(rajasthan$totalexpenditure)-min(rajasthan$totalexpenditure))
rajasthan$Exp_log <- log(rajasthan$totalexpenditure)

# LPG costs
rajasthan$LPG_cylinder_cost <- rajasthan$q127a_1
rajasthan$LPG_cylinder_cost_100 <- rajasthan$q127a_1 / 100
rajasthan$LPG_yearswith <- rajasthan$q124

#####################
####### OUTCOME
#####################

#### Owns LPG
rajasthan$Owns_LPG <- ifelse(rajasthan$q62a_9==1,1,0)


#### LPG use
rajasthan$LPG_days <- rajasthan$q126_1
rajasthan$LPG_cylinderperyear <- 365 / rajasthan$LPG_days
rajasthan$LPG_kgperyear <- rajasthan$LPG_cylinderperyear / 14.2
rajasthan$LPG_kgperyear_person <- rajasthan$LPG_kgperyear / rajasthan$HHSize

rajasthan$LPG_regularuse <- rajasthan$q132

#### Firewood use
rajasthan$Firewood_timespurchased_year <- rajasthan$q76b_1
rajasthan$Firewood_kgpurchased_onetime <- rajasthan$q76c_1
rajasthan$Firewood_rupeespaid_onetime <- rajasthan$q76d_1

rajasthan$Firewood_collected <- rajasthan$q81a_1
rajasthan$Firewood_timescollected_month <- rajasthan$q83a1_1
rajasthan$Firewood_minutesspent_onetime <- rajasthan$q83a2_1

rajasthan$Firewood_kg_month <- rajasthan$q75b_1
rajasthan$Firewood_kg_year <- rajasthan$Firewood_kg_month * 12
rajasthan$Firewood_kgperyear_person <- rajasthan$Firewood_kg_year / rajasthan$HHSize


#### Perceptions indices

rajasthan$q145a_34_rev <- ifelse(rajasthan$q145a_34==1, 5, 
                                 ifelse(rajasthan$q145a_34==2, 4, 
                                        ifelse(rajasthan$q145a_34==3, 3, 
                                               ifelse(rajasthan$q145a_34==4, 2, 
                                                      ifelse(rajasthan$q145a_34==5, 1,
                                                             NA)))))


rajasthan$q145a_49_rev <- ifelse(rajasthan$q145a_49==1, 5, 
                                 ifelse(rajasthan$q145a_49==2, 4, 
                                        ifelse(rajasthan$q145a_39==3, 3, 
                                               ifelse(rajasthan$q145a_49==4, 2, 
                                                      ifelse(rajasthan$q145a_49==5, 1,
                                                             NA)))))

rajasthan$q145a_45_rev <- ifelse(rajasthan$q145a_45==1, 5, 
                                 ifelse(rajasthan$q145a_45==2, 4, 
                                        ifelse(rajasthan$q145a_45==3, 3, 
                                               ifelse(rajasthan$q145a_45==4, 2, 
                                                      ifelse(rajasthan$q145a_45==5, 1,
                                                             NA)))))

###############
### STRAIGHT SUM INDICES

# LPG is good
rajasthan$Perception_Index_LPGGood <- rowSums(rajasthan[,c("q145a_37", "q145a_43", "q145a_45_rev","q145a_48")])

# LPG is affordable
rajasthan$Perception_Index_LPGaffordable <- rowSums(rajasthan[,c("q145a_34_rev","q145a_35","q145a_50","q145a_51")])

# Cooking fuel choice affects health
rajasthan$Perception_Index_CookingHealth <- rowSums(rajasthan[,c("q145a_39","q145a_40","q145a_42")])

# Chulha and firewood are valuable ]]] NOT REVERSED
rajasthan$Perception_Index_FirewoodValuable <- rowSums(rajasthan[,c("q145a_31","q145a_32","q145a_33","q145a_36", "q145a_38","q145a_47")])

# Firewood is not available and inconvenient
rajasthan$Perception_Index_FirewoodUnavailableDifficult <- rowSums(rajasthan[,c("q145a_41","q145a_44","q145a_16","q145a_17",
                                                                                "q145a_46","q145a_49_rev")])

########### THIRD, NORMALIZE 

rajasthan$Perception_Index_LPGGood <- (rajasthan$Perception_Index_LPGGood-min(rajasthan$Perception_Index_LPGGood)) / 
  (max(rajasthan$Perception_Index_LPGGood)-min(rajasthan$Perception_Index_LPGGood))

rajasthan$Perception_Index_LPGaffordable <- (rajasthan$Perception_Index_LPGaffordable-min(rajasthan$Perception_Index_LPGaffordable)) / 
  (max(rajasthan$Perception_Index_LPGaffordable)-min(rajasthan$Perception_Index_LPGaffordable))

rajasthan$Perception_Index_CookingHealth <- (rajasthan$Perception_Index_CookingHealth-min(rajasthan$Perception_Index_CookingHealth)) / 
  (max(rajasthan$Perception_Index_CookingHealth)-min(rajasthan$Perception_Index_CookingHealth))

rajasthan$Perception_Index_FirewoodValuable <- (rajasthan$Perception_Index_FirewoodValuable-min(rajasthan$Perception_Index_FirewoodValuable)) / 
  (max(rajasthan$Perception_Index_FirewoodValuable)-min(rajasthan$Perception_Index_FirewoodValuable))

rajasthan$Perception_Index_FirewoodUnavailableDifficult <- (rajasthan$Perception_Index_FirewoodUnavailableDifficult - min(rajasthan$Perception_Index_FirewoodUnavailableDifficult, na.rm = TRUE)) / 
  (max(rajasthan$Perception_Index_FirewoodUnavailableDifficult, na.rm = T) - min(rajasthan$Perception_Index_FirewoodUnavailableDifficult, na.rm = T))



##################
### SUBSETS   ##
##################

rajasthan$q127a_1 <- ifelse(rajasthan$q127a_1 > 1500, NA, rajasthan$q127a_1)

rajasthan$q127a_1_100 <- rajasthan$q127a_1 / 100

rajasthan <- transform(rajasthan, q127a_1_100 = zoo::na.aggregate(q127a_1_100, by = q3a_3))

rajasthan_lpgcost <- rajasthan %>%
  group_by(q3a_3) %>%
  mutate(q127a_1_100_avg = mean(q127a_1_100)) %>%
  ungroup()



# Subsets of combined education categories
rajasthan_education_CWE_illiteratenoformal <- subset(rajasthan, rajasthan$Education_CWE_illiterate==1)
rajasthan_education_CWE_primary <- subset(rajasthan, rajasthan$Education_CWE_primary==1)
rajasthan_education_CWE_middlehigh <- subset(rajasthan, rajasthan$Education_CWE_middle_high==1)
rajasthan_education_CWE_greaterhigh <- subset(rajasthan, rajasthan$Education_CWE_greaterhigh==1)

# Subsets of combined female education categories
rajasthan_education_female_illiteratenoformal <- subset(rajasthan, rajasthan$Education_4_FemaleSpouse_illiterate==1)
rajasthan_education_female_primary <- subset(rajasthan, rajasthan$Education_4_FemaleSpouse_primary==1)
rajasthan_education_female_middlehigh <- subset(rajasthan, rajasthan$Education_4_FemaleSpouse_middle_high==1)
rajasthan_education_female_greaterhigh <- subset(rajasthan, rajasthan$Education_4_FemaleSpouse_greaterhigh==1)

# Subsets for exploring additional associations
rajasthan_belowmedian_income <- rajasthan %>% subset(Exp_log<=median(rajasthan$Exp_log))
rajasthan_abovemedian_income <- rajasthan %>% subset(Exp_log>median(rajasthan$Exp_log))

rajasthan_income1 <- rajasthan %>% subset(Exp_log<=quantile(rajasthan$Exp_log, probs = 1/3))
rajasthan_income2 <- rajasthan %>% subset(Exp_log>quantile(rajasthan$Exp_log, probs = 1/3) & 
                                            Exp_log<=quantile(rajasthan$Exp_log, probs = 2/3))
rajasthan_income3 <- rajasthan %>% subset(Exp_log>quantile(rajasthan$Exp_log, probs = 2/3))

rajasthan_urban <- rajasthan %>% subset(Rural==0)
rajasthan_rural <- rajasthan %>% subset(Rural==1)

rajasthan_belowmedian_income_rural <- rajasthan_belowmedian_income %>% subset(Rural==1)
rajasthan_belowmedian_income_urban <- rajasthan_belowmedian_income %>% subset(Rural==0)
rajasthan_abovemedian_income_rural <- rajasthan_abovemedian_income %>% subset(Rural==1)
rajasthan_abovemedian_income_urban <- rajasthan_abovemedian_income %>% subset(Rural==0)


# Merging data sets -------------------------------------------------------


kerala_regs <- kerala[,c("q1",
                         "q126_1", "Owns_LPG", "q127a_1",
                         "Education_CWE_primary", "Education_CWE_middle_high", "Education_CWE_greaterhigh",
                         "Exp_log", "NumberAdults", "NumberChildrenUnder18",
                         "AgeRespondent", 
                         "Caste_OBC", "Caste_ScheduledCaste", "Caste_ScheduledTribe", "Caste_DKCS",
                         "Caste_NotGeneral", "Religion_NonHindu",
                         "Religion_Muslim", "Religion_Christian", "Religion_Other", 
                         "Rural", "District")] %>%
  mutate(state="Kerala")

rajasthan_regs <- rajasthan[,c("q1new",
                               "q126_1","Owns_LPG", "q127a_1",
                               "Education_CWE_primary", "Education_CWE_middle_high", "Education_CWE_greaterhigh",
                               "Exp_log", "NumberAdults", "NumberChildrenUnder18",
                               "AgeRespondent", 
                               "Caste_OBC", "Caste_ScheduledCaste", "Caste_ScheduledTribe", "Caste_DKCS",
                               "Caste_MotGeneral", "Religion_MotHindu",
                               "Religion_Muslim", "Religion_Christian", "Religion_Other", 
                               "Rural", "District")] %>%
  mutate(state="Rajasthan") %>%
  rename("q1" = "q1new",
         "Religion_NonHindu" = "Religion_MotHindu",
         "Caste_NotGeneral"  = "Caste_MotGeneral")



merged_regs <- kerala_regs %>%
  bind_rows(rajasthan_regs)

merged_rural <- merged_regs %>%
  filter(Rural==1)
merged_urban <- merged_regs %>%
  filter(Rural==0)

